Added BloomCache classes
authorAaron Schulz <aschulz@wikimedia.org>
Wed, 2 Jul 2014 22:55:03 +0000 (15:55 -0700)
committerOri.livneh <ori@wikimedia.org>
Wed, 3 Sep 2014 17:43:21 +0000 (17:43 +0000)
* Implemented a version of BloomCache using Redis
* Added a BloomCheckTitleHasLogs handler class for avoiding
  slow logging table queries when large amounts of 404 pages
  are viewed (by various web crawlers at the moment).

bug: 67439
Change-Id: I26e5034755e3a7208a45991b1cf2f12467679cc1

includes/AutoLoader.php
includes/DefaultSettings.php
includes/cache/bloom/BloomCache.php [new file with mode: 0644]
includes/cache/bloom/BloomCacheRedis.php [new file with mode: 0644]
includes/cache/bloom/BloomFilters.php [new file with mode: 0644]
includes/logging/LogEntry.php
includes/page/Article.php
maintenance/populateBloomCache.php [new file with mode: 0644]

index 661f4d6..0559a8e 100644 (file)
@@ -36,6 +36,10 @@ $wgAutoloadLocalClasses = array(
        'AuthPluginUser' => 'includes/AuthPlugin.php',
        'Autopromote' => 'includes/Autopromote.php',
        'Block' => 'includes/Block.php',
+       'BloomCache' => 'includes/cache/bloom/BloomCache.php',
+       'BloomCacheRedis' => 'includes/cache/bloom/BloomCacheRedis.php',
+       'BloomFilterTitleHasLogs' => 'includes/cache/bloom/BloomFilters.php',
+       'CacheHelper' => 'includes/CacheHelper.php',
        'Category' => 'includes/Category.php',
        'CategoryFinder' => 'includes/CategoryFinder.php',
        'CategoryViewer' => 'includes/CategoryViewer.php',
@@ -65,6 +69,7 @@ $wgAutoloadLocalClasses = array(
        'DumpPipeOutput' => 'includes/Export.php',
        'EditPage' => 'includes/EditPage.php',
        'EmailNotification' => 'includes/UserMailer.php',
+       'EmptyBloomCache' => 'includes/cache/bloom/BloomCache.php',
        'Fallback' => 'includes/Fallback.php',
        'FauxRequest' => 'includes/WebRequest.php',
        'FauxResponse' => 'includes/WebResponse.php',
index 5fc7377..f2042f9 100644 (file)
@@ -2075,6 +2075,28 @@ $wgObjectCaches = array(
        'hash' => array( 'class' => 'HashBagOStuff' ),
 );
 
+/**
+ * Map of bloom filter store names to configuration arrays.
+ *
+ * Example:
+ * $wgBloomFilterStores['main'] = array(
+ *  'cacheId'      => 'main-v1',
+ *  'class'        => 'BloomCacheRedis',
+ *  'redisServers' => array( '127.0.0.1:6379' ),
+ *  'redisConfig'  => array( 'connectTimeout' => 2 )
+ * );
+ *
+ * A primary bloom filter must be created manually.
+ * Example in eval.php:
+ * <code>
+ *     BloomCache::get( 'main' )->init( 'shared', 1000000000, .001 );
+ * </code>
+ * The size should be as large as practical given wiki size and resources.
+ *
+ * @since 1.24
+ */
+$wgBloomFilterStores = array();
+
 /**
  * The expiry time for the parser cache, in seconds.
  * The default is 86400 (one day).
diff --git a/includes/cache/bloom/BloomCache.php b/includes/cache/bloom/BloomCache.php
new file mode 100644 (file)
index 0000000..236db95
--- /dev/null
@@ -0,0 +1,323 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Aaron Schulz
+ */
+
+/**
+ * Persistent bloom filter used to avoid expensive lookups
+ *
+ * @since 1.24
+ */
+abstract class BloomCache {
+       /** @var string Unique ID for key namespacing */
+       protected $cacheID;
+
+       /** @var array Map of (id => BloomCache) */
+       protected static $instances = array();
+
+       /**
+        * @param string $id
+        * @return BloomCache
+        */
+       final public static function get( $id ) {
+               global $wgBloomFilterStores;
+
+               if ( !isset( self::$instances[$id] ) ) {
+                       if ( isset( $wgBloomFilterStores[$id] ) ) {
+                               $class = $wgBloomFilterStores[$id]['class'];
+                               self::$instances[$id] = new $class( $wgBloomFilterStores[$id] );
+                       } else {
+                               wfDebug( "No bloom filter store '$id'; using EmptyBloomCache." );
+                               return new EmptyBloomCache( array() );
+                       }
+               }
+
+               return self::$instances[$id];
+       }
+
+       /**
+        * Create a new bloom cache instance from configuration.
+        * This should only be called from within BloomCache.
+        *
+        * @param array $config Parameters include:
+        *   - cacheID : Prefix to all bloom filter names that is unique to this cache.
+        *               It should only consist of alphanumberic, '-', and '_' characters.
+        *               This ID is what avoids collisions if multiple logical caches
+        *               use the same storage system, so this should be set carefully.
+        */
+       public function __construct( array $config ) {
+               $this->cacheID = $config['cacheId'];
+               if ( !preg_match( '!^[a-zA-Z0-9-_]{1,32}$!', $this->cacheID ) ) {
+                       throw new MWException( "Cache ID '{$this->cacheID}' is invalid." );
+               }
+       }
+
+       /**
+        * Check if a member is set in the bloom filter
+        *
+        * A member being set means that it *might* have been added.
+        * A member not being set means it *could not* have been added.
+        *
+        * This abstracts over isHit() to deal with filter updates and readiness.
+        * A class must exist with the name BloomFilter<type> and a static public
+        * mergeAndCheck() method. The later takes the following arguments:
+        *              (BloomCache $bcache, $domain, $virtualKey, array $status)
+        * The method should return a bool indicating whether to use the filter.
+        *
+        * The 'shared' bloom key must be used for any updates and will be used
+        * for the membership check if the method returns true. Since the key is shared,
+        * the method should never use delete(). The filter cannot be used in cases where
+        * membership in the filter needs to be taken away. In such cases, code *cannot*
+        * use this method - instead, it can directly use the other BloomCache methods
+        * to manage custom filters with their own keys (e.g. not 'shared').
+        *
+        * @param string $domain
+        * @param string $type
+        * @param string $member
+        * @return bool True if set, false if not (also returns true on error)
+        */
+       final public function check( $domain, $type, $member ) {
+               $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
+
+               if ( method_exists( "BloomFilter{$type}", 'mergeAndCheck' ) ) {
+                       try {
+                               $virtualKey = "$domain:$type";
+
+                               $status = $this->getStatus( $virtualKey );
+                               if ( $status == false ) {
+                                       wfDebug( "Could not query virtual bloom filter '$virtualKey'." );
+                                       return null;
+                               }
+
+                               $useFilter = call_user_func_array(
+                                       array( "BloomFilter{$type}", 'mergeAndCheck' ),
+                                       array( $this, $domain, $virtualKey, $status )
+                               );
+
+                               if ( $useFilter ) {
+                                       return ( $this->isHit( 'shared', "$virtualKey:$member" ) !== false );
+                               }
+                       } catch ( MWException $e ) {
+                               MWExceptionHandler::logException( $e );
+                               return true;
+                       }
+               }
+
+               return true;
+       }
+
+       /**
+        * Inform the bloom filter of a new member in order to keep it up to date
+        *
+        * @param string $domain
+        * @param string $type
+        * @param string|array $members
+        * @return bool Success
+        */
+       final public function insert( $domain, $type, $members ) {
+               $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
+
+               if ( method_exists( "BloomFilter{$type}", 'mergeAndCheck' ) ) {
+                       try {
+                               $virtualKey = "$domain:$type";
+                               $prefixedMembers = array();
+                               foreach ( (array)$members as $member ) {
+                                       $prefixedMembers[] = "$virtualKey:$member";
+                               }
+
+                               return $this->add( 'shared', $prefixedMembers );
+                       } catch ( MWException $e ) {
+                               MWExceptionHandler::logException( $e );
+                               return false;
+                       }
+               }
+
+               return true;
+       }
+
+       /**
+        * Create a new bloom filter at $key (if one does not exist yet)
+        *
+        * @param string $key
+        * @param integer $size Bit length [default: 1000000]
+        * @param float $precision [default: .001]
+        * @return bool Success
+        */
+       final public function init( $key, $size = 1000000, $precision = .001 ) {
+               $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
+
+               return $this->doInit( "{$this->cacheID}:$key", $size, min( .1, $precision ) );
+       }
+
+       /**
+        * Add a member to the bloom filter at $key
+        *
+        * @param string $key
+        * @param string|array $members
+        * @return bool Success
+        */
+       final public function add( $key, $members ) {
+               $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
+
+               return $this->doAdd( "{$this->cacheID}:$key", (array)$members );
+       }
+
+       /**
+        * Check if a member is set in the bloom filter.
+        *
+        * A member being set means that it *might* have been added.
+        * A member not being set means it *could not* have been added.
+        *
+        * If this returns true, then the caller usually should do the
+        * expensive check (whatever that may be). It can be avoided otherwise.
+        *
+        * @param string $key
+        * @param string $member
+        * @return bool|null True if set, false if not, null on error
+        */
+       final public function isHit( $key, $member ) {
+               $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
+
+               return $this->doIsHit( "{$this->cacheID}:$key", $member );
+       }
+
+       /**
+        * Destroy a bloom filter at $key
+        *
+        * @param string $key
+        * @return bool Success
+        */
+       final public function delete( $key ) {
+               $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
+
+               return $this->doDelete( "{$this->cacheID}:$key" );
+       }
+
+       /**
+        * Set the status map of the virtual bloom filter at $key
+        *
+        * @param string $virtualKey
+        * @param array $values Map including some of (lastID, asOfTime, epoch)
+        * @return bool Success
+        */
+       final public function setStatus( $virtualKey, array $values ) {
+               $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
+
+               return $this->doSetStatus( "{$this->cacheID}:$virtualKey", $values );
+       }
+
+       /**
+        * Get the status map of the virtual bloom filter at $key
+        *
+        * The map includes:
+        *   - lastID    : the highest ID of the items merged in
+        *   - asOfTime  : UNIX timestamp that the filter is up-to-date as of
+        *   - epoch     : UNIX timestamp that filter started being populated
+        * Unset fields will have a null value.
+        *
+        * @param string $virtualKey
+        * @return array|bool False on failure
+        */
+       final public function getStatus( $virtualKey ) {
+               $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
+
+               return $this->doGetStatus( "{$this->cacheID}:$virtualKey" );
+       }
+
+       /**
+        * Get an exclusive lock on a filter for updates
+        *
+        * @param string $virtualKey
+        * @return ScopedCallback|ScopedLock|null Returns null if acquisition failed
+        */
+       public function getScopedLock( $virtualKey ) {
+               return null;
+       }
+
+       /**
+        * @param string $key
+        * @param integer $size Bit length
+        * @param float $precision
+        * @return bool Success
+        */
+       abstract protected function doInit( $key, $size, $precision );
+
+       /**
+        * @param string $key
+        * @param array $members
+        * @return bool Success
+        */
+       abstract protected function doAdd( $key, array $members );
+
+       /**
+        * @param string $key
+        * @param string $member
+        * @return bool|null
+        */
+       abstract protected function doIsHit( $key, $member );
+
+       /**
+        * @param string $key
+        * @return bool Success
+        */
+       abstract protected function doDelete( $key );
+
+       /**
+        * @param string $virtualKey
+        * @param array $values
+        * @return bool Success
+        */
+       abstract protected function doSetStatus( $virtualKey, array $values );
+
+       /**
+        * @param string $key
+        * @return array|bool
+        */
+       abstract protected function doGetStatus( $key );
+}
+
+class EmptyBloomCache extends BloomCache {
+       public function __construct( array $config ) {
+               parent::__construct( array( 'cacheId' => 'none' ) );
+       }
+
+       protected function doInit( $key, $size, $precision ) {
+               return true;
+       }
+
+       protected function doAdd( $key, array $members ) {
+               return true;
+       }
+
+       protected function doIsHit( $key, $member ) {
+               return true;
+       }
+
+       protected function doDelete( $key ) {
+               return true;
+       }
+
+       protected function doSetStatus( $virtualKey, array $values ) {
+               return true;
+       }
+
+       protected function doGetStatus( $virtualKey ) {
+               return array( 'lastID' => null, 'asOfTime' => null, 'epoch' => null ) ;
+       }
+}
diff --git a/includes/cache/bloom/BloomCacheRedis.php b/includes/cache/bloom/BloomCacheRedis.php
new file mode 100644 (file)
index 0000000..7bafc99
--- /dev/null
@@ -0,0 +1,370 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Aaron Schulz
+ */
+
+/**
+ * Bloom filter implented using Redis
+ *
+ * The Redis server must be >= 2.6 and should have volatile-lru or volatile-ttl
+ * if there is any eviction policy. It should not be allkeys-* in any case. Also,
+ * this can be used in a simple master/slave setup or with Redis Sentinal preferably.
+ *
+ * Some bits are based on https://github.com/ErikDubbelboer/redis-lua-scaling-bloom-filter
+ * but are simplified to use a single filter instead of up to 32 filters.
+ *
+ * @since 1.24
+ */
+class BloomCacheRedis extends BloomCache {
+       /** @var RedisConnectionPool */
+       protected $redisPool;
+       /** @var RedisLockManager */
+       protected $lockMgr;
+       /** @var array */
+       protected $servers;
+       /** @var integer Federate each filter into this many redis bitfield objects */
+       protected $segments = 128;
+
+       /**
+        * @params include:
+        *   - redisServers : list of servers (address:<port>) (the first is the master)
+        *   - redisConf    : additional redis configuration
+        *
+        * @param array $config
+        */
+       public function __construct( array $config ) {
+               parent::__construct( $config );
+
+               $redisConf = $config['redisConfig'];
+               $redisConf['serializer'] = 'none'; // manage that in this class
+               $this->redisPool = RedisConnectionPool::singleton( $redisConf );
+               $this->servers = $config['redisServers'];
+               $this->lockMgr = new RedisLockManager( array(
+                       'lockServers'  => array( 'srv1' => $this->servers[0] ),
+                       'srvsByBucket' => array( 0 => array( 'srv1' ) ),
+                       'redisConfig'  => $config['redisConfig']
+               ) );
+       }
+
+       protected function doInit( $key, $size, $precision ) {
+               $conn = $this->getConnection( 'master' );
+               if ( !$conn ) {
+                       return false;
+               }
+
+               // 80000000 items at p = .001 take up 500MB and fit into one value.
+               // Do not hit the 512MB redis value limit by reducing the demands.
+               $size = min( $size, 80000000 * $this->segments );
+               $precision = max( round( $precision, 3 ), .001 );
+               $epoch = microtime( true );
+
+               static $script =
+<<<LUA
+               local kMetadata, kData = unpack(KEYS)
+               local aEntries, aPrec, aEpoch = unpack(ARGV)
+               if redis.call('EXISTS',kMetadata) == 0 or redis.call('EXISTS',kData) == 0 then
+                       redis.call('DEL',kMetadata)
+                       redis.call('HSET',kMetadata,'entries',aEntries)
+                       redis.call('HSET',kMetadata,'precision',aPrec)
+                       redis.call('HSET',kMetadata,'epoch',aEpoch)
+                       redis.call('SET',kData,'')
+                       return 1
+               end
+               return 0
+LUA;
+
+               $res = false;
+               try {
+                       $conn->script( 'load', $script );
+                       $conn->multi( Redis::MULTI );
+                       for ( $i = 0; $i < $this->segments; ++$i ) {
+                               $res = $conn->luaEval( $script,
+                                       array(
+                                               "$key:$i:bloom-metadata", # KEYS[1]
+                                               "$key:$i:bloom-data", # KEYS[2]
+                                               ceil( $size / $this->segments ), # ARGV[1]
+                                               $precision, # ARGV[2]
+                                               $epoch # ARGV[3]
+                                       ),
+                                       2 # number of first argument(s) that are keys
+                               );
+                       }
+                       $results = $conn->exec();
+                       $res = $results && !in_array( false, $results, true );
+               } catch ( RedisException $e ) {
+                       $this->handleException( $conn, $e );
+               }
+
+               return ( $res !== false );
+       }
+
+       protected function doAdd( $key, array $members ) {
+               $conn = $this->getConnection( 'master' );
+               if ( !$conn ) {
+                       return false;
+               }
+
+               static $script =
+<<<LUA
+               local kMetadata, kData = unpack(KEYS)
+               local aMember = unpack(ARGV)
+
+               -- Check if the filter was initialized
+               if redis.call('EXISTS',kMetadata) == 0 or redis.call('EXISTS',kData) == 0 then
+                       return false
+               end
+
+               -- Initial expected entries and desired precision
+               local entries = 1*redis.call('HGET',kMetadata,'entries')
+               local precision = 1*redis.call('HGET',kMetadata,'precision')
+               local hash = redis.sha1hex(aMember)
+
+               -- Based on the math from: http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
+               -- 0.480453013 = ln(2)^2
+               local bits = math.ceil((entries * math.log(precision)) / -0.480453013)
+
+               -- 0.693147180 = ln(2)
+               local k = math.floor(0.693147180 * bits / entries)
+
+               -- This uses a variation on:
+               -- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
+               -- http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
+               local h = { }
+               h[0] = tonumber(string.sub(hash, 1, 8 ), 16)
+               h[1] = tonumber(string.sub(hash, 9, 16), 16)
+               h[2] = tonumber(string.sub(hash, 17, 24), 16)
+               h[3] = tonumber(string.sub(hash, 25, 32), 16)
+
+               for i=1, k do
+                       local pos = (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits
+                       redis.call('SETBIT', kData, pos, 1)
+               end
+
+               return 1
+LUA;
+
+               $res = false;
+               try {
+                       $conn->script( 'load', $script );
+                       $conn->multi( Redis::PIPELINE );
+                       foreach ( $members as $member ) {
+                               $i = $this->getSegment( $member );
+                               $conn->luaEval( $script,
+                                       array(
+                                               "$key:$i:bloom-metadata", # KEYS[1],
+                                               "$key:$i:bloom-data", # KEYS[2]
+                                               $member # ARGV[1]
+                                       ),
+                                       2 # number of first argument(s) that are keys
+                               );
+                       }
+                       $results = $conn->exec();
+                       $res = $results && !in_array( false, $results, true );
+               } catch ( RedisException $e ) {
+                       $this->handleException( $conn, $e );
+               }
+
+               if ( $res === false ) {
+                       wfDebug( "Could not add to the '$key' bloom filter; it may be missing." );
+               }
+
+               return ( $res !== false );
+       }
+
+       protected function doSetStatus( $virtualKey, array $values ) {
+               $conn = $this->getConnection( 'master' );
+               if ( !$conn ) {
+                       return null;
+               }
+
+               $res = false;
+               try {
+                       $res = $conn->hMSet( "$virtualKey:filter-metadata", $values );
+               } catch ( RedisException $e ) {
+                       $this->handleException( $conn, $e );
+               }
+
+               return ( $res !== false );
+       }
+
+       protected function doGetStatus( $virtualKey ) {
+               $conn = $this->getConnection( 'slave' );
+               if ( !$conn ) {
+                       return false;
+               }
+
+               $res = false;
+               try {
+                       $res = $conn->hGetAll( "$virtualKey:filter-metadata" );
+               } catch ( RedisException $e ) {
+                       $this->handleException( $conn, $e );
+               }
+
+               if ( is_array( $res ) ) {
+                       $res['lastID'] = isset( $res['lastID'] ) ? $res['lastID'] : null;
+                       $res['asOfTime'] = isset( $res['asOfTime'] ) ? $res['asOfTime'] : null;
+                       $res['epoch'] = isset( $res['epoch'] ) ? $res['epoch'] : null;
+               }
+
+               return $res;
+       }
+
+       protected function doIsHit( $key, $member ) {
+               $conn = $this->getConnection( 'slave' );
+               if ( !$conn ) {
+                       return null;
+               }
+
+               static $script =
+<<<LUA
+               local kMetadata, kData = unpack(KEYS)
+               local aMember = unpack(ARGV)
+
+               -- Check if the filter was initialized
+               if redis.call('EXISTS',kMetadata) == 0 or redis.call('EXISTS',kData) == 0 then
+                       return false
+               end
+
+               -- Initial expected entries and desired precision.
+               -- This determines the size of the first and subsequent filters.
+               local entries = redis.call('HGET',kMetadata,'entries')
+               local precision = redis.call('HGET',kMetadata,'precision')
+               local hash = redis.sha1hex(aMember)
+
+               -- This uses a variation on:
+               -- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
+               -- http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
+               local h = { }
+               h[0] = tonumber(string.sub(hash, 1, 8 ), 16)
+               h[1] = tonumber(string.sub(hash, 9, 16), 16)
+               h[2] = tonumber(string.sub(hash, 17, 24), 16)
+               h[3] = tonumber(string.sub(hash, 25, 32), 16)
+
+               -- 0.480453013 = ln(2)^2
+               local bits = math.ceil((entries * math.log(precision)) / -0.480453013)
+
+               -- 0.693147180 = ln(2)
+               local k = math.floor(0.693147180 * bits / entries)
+
+               local found = 1
+               for i=1, k do
+                       local pos = (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits
+                       if redis.call('GETBIT', kData, pos) == 0 then
+                               found = 0
+                               break
+                       end
+               end
+
+               return found
+LUA;
+
+               $res = null;
+               try {
+                       $i = $this->getSegment( $member );
+                       $res = $conn->luaEval( $script,
+                               array(
+                                       "$key:$i:bloom-metadata", # KEYS[1],
+                                       "$key:$i:bloom-data", # KEYS[2]
+                                       $member # ARGV[1]
+                               ),
+                               2 # number of first argument(s) that are keys
+                       );
+               } catch ( RedisException $e ) {
+                       $this->handleException( $conn, $e );
+               }
+
+               return is_int( $res ) ? (bool)$res : null;
+       }
+
+       protected function doDelete( $key ) {
+               $conn = $this->getConnection( 'master' );
+               if ( !$conn ) {
+                       return false;
+               }
+
+               $res = false;
+               try {
+                       $keys = array();
+                       for ( $i = 0; $i < $this->segments; ++$i ) {
+                               $keys[] = "$key:$i:bloom-metadata";
+                               $keys[] = "$key:$i:bloom-data";
+                       }
+                       $res = $conn->delete( $keys );
+               } catch ( RedisException $e ) {
+                       $this->handleException( $conn, $e );
+               }
+
+               return ( $res !== false );
+       }
+
+       public function getScopedLock( $virtualKey ) {
+               $status = Status::newGood();
+               return ScopedLock::factory( $this->lockMgr,
+                       array( $virtualKey ), LockManager::LOCK_EX, $status );
+       }
+
+       /**
+        * @param string $member
+        * @return integer
+        */
+       protected function getSegment( $member ) {
+               return hexdec( substr( md5( $member ), 0, 2 ) ) % $this->segments;
+       }
+
+       /**
+        * $param string $to (master/slave)
+        * @return RedisConnRef|bool Returns false on failure
+        */
+       protected function getConnection( $to ) {
+               if ( $to === 'master' ) {
+                       $conn = $this->redisPool->getConnection( $this->servers[0] );
+               } else {
+                       static $lastServer = null;
+
+                       $conn = false;
+                       if ( $lastServer ) {
+                               $conn = $this->redisPool->getConnection( $lastServer );
+                               if ( $conn ) {
+                                       return $conn; // reuse connection
+                               }
+                       }
+                       $servers = $this->servers;
+                       $attempts = min( 3, count( $servers ) );
+                       for ( $i = 1; $i <= $attempts; ++$i ) {
+                               $index = mt_rand( 0, count( $servers ) - 1 );
+                               $conn = $this->redisPool->getConnection( $servers[$index] );
+                               if ( $conn ) {
+                                       $lastServer = $servers[$index];
+                                       return $conn;
+                               }
+                               unset( $servers[$index] ); // skip next time
+                       }
+               }
+
+               return $conn;
+       }
+
+       /**
+        * @param RedisConnRef $conn
+        * @param Exception $e
+        */
+       protected function handleException( RedisConnRef $conn, $e ) {
+               $this->redisPool->handleError( $conn, $e );
+       }
+}
diff --git a/includes/cache/bloom/BloomFilters.php b/includes/cache/bloom/BloomFilters.php
new file mode 100644 (file)
index 0000000..9b710d7
--- /dev/null
@@ -0,0 +1,79 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Aaron Schulz
+ */
+
+/**
+ * @since 1.24
+ */
+class BloomFilterTitleHasLogs {
+       public static function mergeAndCheck(
+               BloomCache $bcache, $domain, $virtualKey, array $status
+       ) {
+               $age = microtime( true ) - $status['asOfTime']; // seconds
+               $scopedLock = ( mt_rand( 1, (int)pow( 3, max( 0, 5 - $age ) ) ) == 1 )
+                       ? $bcache->getScopedLock( $virtualKey )
+                       : false;
+
+               if ( $scopedLock ) {
+                       $updates = self::merge( $bcache, $domain, $virtualKey, $status );
+                       if ( isset( $updates['asOfTime'] ) ) {
+                               $age = ( microtime( true ) - $updates['asOfTime'] );
+                       }
+               }
+
+               return ( $age < 30 );
+       }
+
+       public static function merge(
+               BloomCache $bcache, $domain, $virtualKey, array $status
+       ) {
+               $limit = 1000;
+               $dbr = wfGetDB( DB_SLAVE, array(), $domain );
+               $res = $dbr->select( 'logging',
+                       array( 'log_namespace', 'log_title', 'log_id', 'log_timestamp' ),
+                       array( 'log_id > ' . $dbr->addQuotes( (int)$status['lastID'] ) ),
+                       __METHOD__,
+                       array( 'ORDER BY' => 'log_id', 'LIMIT' => $limit )
+               );
+
+               $updates = array();
+               if ( $res->numRows() > 0 ) {
+                       $members = array();
+                       foreach ( $res as $row ) {
+                               $members[] = "$virtualKey:{$row->log_namespace}:{$row->log_title}";
+                       }
+                       $lastID = $row->log_id;
+                       $lastTime = $row->log_timestamp;
+                       if ( !$bcache->add( 'shared', $members ) ) {
+                               return false;
+                       }
+                       $updates['lastID'] = $lastID;
+                       $updates['asOfTime'] = wfTimestamp( TS_UNIX, $lastTime );
+               } else {
+                       $updates['asOfTime'] = microtime( true );
+               }
+
+               $updates['epoch'] = $status['epoch'] ?: microtime( true );
+
+               $bcache->setStatus( $virtualKey, $updates );
+
+               return $updates;
+       }
+}
index bebe3a9..46c5515 100644 (file)
@@ -533,6 +533,10 @@ class ManualLogEntry extends LogEntryBase {
                        $dbw->insert( 'log_search', $rows, __METHOD__, 'IGNORE' );
                }
 
+               // Update any bloom filter cache
+               $member = $this->getTarget()->getNamespace() . ':' . $this->getTarget()->getDBkey();
+               BloomCache::get( 'main' )->insert( wfWikiId(), 'TitleHasLogs', $member );
+
                return $this->id;
        }
 
index 8970539..b433414 100644 (file)
@@ -1190,15 +1190,18 @@ class Article implements Page {
         */
        public function showMissingArticle() {
                global $wgSend404Code;
+
                $outputPage = $this->getContext()->getOutput();
                // Whether the page is a root user page of an existing user (but not a subpage)
                $validUserPage = false;
 
+               $title = $this->getTitle();
+
                # Show info in user (talk) namespace. Does the user exist? Is he blocked?
-               if ( $this->getTitle()->getNamespace() == NS_USER
-                       || $this->getTitle()->getNamespace() == NS_USER_TALK
+               if ( $title->getNamespace() == NS_USER
+                       || $title->getNamespace() == NS_USER_TALK
                ) {
-                       $parts = explode( '/', $this->getTitle()->getText() );
+                       $parts = explode( '/', $title->getText() );
                        $rootPart = $parts[0];
                        $user = User::newFromName( $rootPart, false /* allow IP users*/ );
                        $ip = User::isIP( $rootPart );
@@ -1222,9 +1225,9 @@ class Article implements Page {
                                                )
                                        )
                                );
-                               $validUserPage = !$this->getTitle()->isSubpage();
+                               $validUserPage = !$title->isSubpage();
                        } else {
-                               $validUserPage = !$this->getTitle()->isSubpage();
+                               $validUserPage = !$title->isSubpage();
                        }
                }
 
@@ -1236,12 +1239,16 @@ class Article implements Page {
                wfRunHooks( 'Article::MissingArticleConditions', array( &$conds, $logTypes ) );
 
                # Show delete and move logs
-               LogEventsList::showLogExtract( $outputPage, $logTypes, $this->getTitle(), '',
-                       array( 'lim' => 10,
-                               'conds' => $conds,
-                               'showIfEmpty' => false,
-                               'msgKey' => array( 'moveddeleted-notice' ) )
-               );
+               $member = $title->getNamespace() . ':' . $title->getDBkey();
+               // @todo: move optimization to showLogExtract()?
+               if ( BloomCache::get( 'main' )->check( wfWikiId(), 'TitleHasLogs', $member ) ) {
+                       LogEventsList::showLogExtract( $outputPage, $logTypes, $title, '',
+                               array( 'lim' => 10,
+                                       'conds' => $conds,
+                                       'showIfEmpty' => false,
+                                       'msgKey' => array( 'moveddeleted-notice' ) )
+                       );
+               }
 
                if ( !$this->mPage->hasViewableContent() && $wgSend404Code && !$validUserPage ) {
                        // If there's no backing content, send a 404 Not Found
@@ -1264,11 +1271,11 @@ class Article implements Page {
                $oldid = $this->getOldID();
                if ( $oldid ) {
                        $text = wfMessage( 'missing-revision', $oldid )->plain();
-               } elseif ( $this->getTitle()->getNamespace() === NS_MEDIAWIKI ) {
+               } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
                        // Use the default message text
-                       $text = $this->getTitle()->getDefaultMessageText();
-               } elseif ( $this->getTitle()->quickUserCan( 'create', $this->getContext()->getUser() )
-                       && $this->getTitle()->quickUserCan( 'edit', $this->getContext()->getUser() )
+                       $text = $title->getDefaultMessageText();
+               } elseif ( $title->quickUserCan( 'create', $this->getContext()->getUser() )
+                       && $title->quickUserCan( 'edit', $this->getContext()->getUser() )
                ) {
                        $message = $this->getContext()->getUser()->isLoggedIn() ? 'noarticletext' : 'noarticletextanon';
                        $text = wfMessage( $message )->plain();
diff --git a/maintenance/populateBloomCache.php b/maintenance/populateBloomCache.php
new file mode 100644 (file)
index 0000000..40ad5fc
--- /dev/null
@@ -0,0 +1,78 @@
+<?php
+/**
+ * Script to populate a bloom filter with a BloomFilter* class
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Script to populate a bloom filter with a BloomFilter* class
+ *
+ * @ingroup Maintenance
+ */
+class PopulateBloomFilter extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->addOption( 'cache', 'Bloom cache store name', true, true );
+               $this->addOption( 'filter', 'Bloom filter name', true, true );
+               $this->addOption( 'domain', 'Bloom filter domain', true, true );
+               $this->addOption( 'delay', 'Sleep delay between batches (us)', false, true );
+               $this->mDescription = "Populate the specified bloom filter";
+       }
+
+       public function execute() {
+               $type = $this->getOption( 'filter' );
+               $domain = $this->getOption( 'domain' );
+               $bcache = BloomCache::get( $this->getOption( 'cache' ) );
+               $delay = $this->getOption( 'delay', 1e5 );
+
+               if ( !method_exists( "BloomFilter{$type}", 'merge' ) ) {
+                       $this->error( "No \"BloomFilter{$type}::merge\" method found.", 1 );
+               }
+
+               $virtualKey = "$domain:$type";
+               $status = $bcache->getStatus( $virtualKey );
+               if ( $status == false ) {
+                       $this->error( "Could not query virtual bloom filter '$virtualKey'.", 1 );
+               }
+
+               $startTime = microtime( true );
+               $this->output( "Current timestamp is '$startTime'.\n" );
+               $this->output( "Current filter timestamp is '{$status['asOfTime']}'.\n" );
+
+               do {
+                       $status = call_user_func_array(
+                               array( "BloomFilter{$type}", 'merge' ),
+                               array( $bcache, $domain, $virtualKey, $status )
+                       );
+                       if ( $status == false ) {
+                               $this->error( "Could not query virtual bloom filter '$virtualKey'.", 1 );
+                       }
+                       $this->output( "Filter updated to timestamp '{$status['asOfTime']}'.\n" );
+                       usleep( $delay );
+               } while ( $status['asOfTime'] && $status['asOfTime'] < $startTime );
+
+               $this->output( "Done, filter $type of domain $domain reached time '$startTime'.\n" );
+       }
+}
+
+$maintClass = "PopulateBloomFilter";
+require_once RUN_MAINTENANCE_IF_MAIN;